import urllib.request
from bs4 import BeautifulSoup
import time
import random
import os
import pymysql

time.sleep(random.random())
# url="https://movie.douban.com/j/new_search_subjects?sort=U&range=0,10&tags=&start=0&genres=%E5%96%9C%E5%89%A7"
headers = ("User-Agent",
           "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36")

opener = urllib.request.build_opener()

opener.addheaders = [headers]

db = pymysql.connect("10.100.185.117", "root", "gly123", "movie")

# 接着我们获取 cursor 来操作我们的 avIdol 这个数据库
cursor = db.cursor()
path='C:/Users/wang/PycharmProjects/spider/图片'
path = path.strip()

path = path.rstrip("\\")
os.makedirs(path)

for i in range(2):
    try:
        url_visit = 'https://movie.douban.com/j/new_search_subjects?sort=U&range=0,10&tags=&start={}&genres=%E6%82%AC%E7%96%91'.format((i+41)* 20)
        data = opener.open(url_visit).read()
        soup = BeautifulSoup(data, 'lxml')
        file = soup.body.p.string
        js = eval(file)
        for j in range(20):
            try:
                num=str(i*20+j+1)
                print("第", i * 20 + j + 1, "次")
                url = js['data'][j]['url'].replace('\/', '/')
                data1 = opener.open(url).read()
                soup = BeautifulSoup(data1, 'lxml')
                img = soup.find(name='img').get('src')
                type = ""
                for k in soup.find_all(property="v:genre"):
                    type = type + k.string + ","
                dirc = ""
                for k in soup.find_all(rel="v:directedBy"):
                    dirc = dirc + k.string + ","
                act = ""
                for k in soup.find(rel="v:starring"):
                    act = act + k.string + ","
                    act = ""
                for k in soup.find_all(rel="v:starring")[:4]:
                    act = act + k.string + ","
                for k in soup.find_all(class_='pl'):
                    if (k.string == "制片国家/地区:"):
                        country = k.next_sibling
                        break
                for k in soup.find_all(class_='pl'):
                    if (k.string == "语言:"):
                        lan = k.next_sibling
                        break
                urllib.request.urlretrieve(img, 'C:/Users/wang/PycharmProjects/spider/图片/%s.jpg' % (i * 20 + j + 1))
                fp = open("C:/Users/wang/PycharmProjects/spider/图片/%s.jpg" % (i * 20 + j + 1), "rb")
                imge = fp.read()
                fp.close()

                sql = """INSERT INTO suspense(id, name , dirc, act, type,country,lan,
                                            rate,img)VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
                try:
# 执行sql语句
                    cursor.execute(sql, [num,js['data'][j]['title'], dirc[:-1], act[:-1], type[:-1], country, lan,
                                         js['data'][j]['rate'], imge])
                    # 提交到数据库执行
                    db.commit()

                except:
                    pass
            except:
                pass
            continue
    except:
        pass
    continue
